Visualization of running data

Preliminar analysis and data exploration

This is a wip notebook

not part of the final delivery file.

We include this only if the professor would like to take an eye in the first tries of exploration and visualization that we did as a preliminary part of our project.

In [1]:
import lxml.etree as etree
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
In [2]:
xtree = etree.parse('./Data/activities/264774936.tcx')
xroot = xtree.getroot()

#ns is the default namespace of the tree
ns = {'d': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}

#Since Activity is always one element, we put the [0] to take the element out of the list
Activity = xroot.findall(".//d:Activity", ns)[0] 

Creating the dataframe from the raw data:

In [3]:
df_cols = ["time", "latitude", "longitude", "altitude", "distance", "heartRate", "lap"]
rows = []

lapnum = 0; 

for lap in Activity.findall("./d:Lap", ns):
    for elem in lap.findall("./d:Track/d:Trackpoint", ns):

        #don't know if float and int conversion are necessary 
        s_time = etree.tostring(elem.find("./d:Time", ns), method="text").strip().decode("utf-8") 
        s_latitude = float(etree.tostring(elem.find("./d:Position/d:LatitudeDegrees", ns), method="text").strip().decode("utf-8"))
        s_longitude = float(etree.tostring(elem.find("./d:Position/d:LongitudeDegrees", ns), method="text").strip().decode("utf-8")) 
        s_altitude = float(etree.tostring(elem.find("./d:AltitudeMeters", ns), method="text").strip().decode("utf-8")) 
        s_distance = float(etree.tostring(elem.find("./d:DistanceMeters", ns), method="text").strip().decode("utf-8")) 
        s_heartRate = int(etree.tostring(elem.find("./d:HeartRateBpm/d:Value", ns), method="text").strip().decode("utf-8")) 

        rows.append({"time": s_time, "latitude": s_latitude, 
                     "longitude": s_longitude, "altitude": s_altitude,
                     "distance": s_distance, "heartRate": s_heartRate, 
                     "lap": lapnum})
    lapnum = lapnum +1 

out_df = pd.DataFrame(rows, columns = df_cols)
out_df['time'] = pd.to_datetime(out_df.time)
out_df.set_index('time')
Out[3]:
latitude longitude altitude distance heartRate lap
time
2014-12-26 07:37:25+00:00 45.437115 9.243325 109.577 0.0 136 0
2014-12-26 07:37:26+00:00 45.437116 9.243332 109.577 0.0 136 0
2014-12-26 07:37:27+00:00 45.437117 9.243331 109.425 0.0 136 0
2014-12-26 07:37:28+00:00 45.437115 9.243327 109.425 0.0 136 0
2014-12-26 07:37:29+00:00 45.437111 9.243319 109.272 0.0 137 0
... ... ... ... ... ... ...
2014-12-26 08:21:47+00:00 45.437315 9.243202 105.310 7987.6 155 7
2014-12-26 08:21:48+00:00 45.437322 9.243221 105.310 7990.0 155 7
2014-12-26 08:21:49+00:00 45.437334 9.243256 105.310 7991.9 155 7
2014-12-26 08:21:50+00:00 45.437334 9.243258 105.310 7994.4 154 7
2014-12-26 08:21:51+00:00 45.437334 9.243263 105.310 7995.3 154 7

2667 rows × 6 columns

In [4]:
out_df.describe()
Out[4]:
latitude longitude altitude distance heartRate lap
count 2667.000000 2667.000000 2667.000000 2667.000000 2667.000000 2667.000000
mean 45.436484 9.243569 108.017831 3910.939558 167.373828 3.491189
std 0.000662 0.001442 2.315270 2388.036242 10.868965 2.401280
min 45.435254 9.241068 102.871000 0.000000 136.000000 0.000000
25% 45.435908 9.242369 106.376000 1744.500000 156.000000 1.000000
50% 45.436563 9.243557 108.358000 3840.700000 173.000000 3.000000
75% 45.437117 9.244735 109.729000 6014.750000 176.000000 6.000000
max 45.437525 9.246128 112.473000 7995.300000 182.000000 7.000000
In [5]:
plot = out_df.plot(title='heartrate by distance', x ='distance', y='heartRate', kind = 'line', figsize=(15,5))

#exploring if there is some correlation between the change of a lap and a change in heartrate
df_startlap = out_df.groupby(by='lap').first()

#for index, row in df_startlap.iterrows():
#   plot.axvline(x=row['time'], color='r', linestyle='--')
#df.head()

df_startlap.plot(title='first heartrate in lap', y='heartRate', kind = 'line', figsize=(15,5))
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x25461c476c8>

seems like there's no corellation between the start of a lap and the heartrate, probably lap is just something that gets pushed by the user.

We now try to enrich the dataset with the instantaneous pace calculated with distance and time and understand if it's a reliable way to estimate the actual pace

In [6]:
#initial pace of the first element is arbitrarily set to 0
out_df.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for x in range(1, len(out_df)):
    out_df.loc[x, 'Instant_pace'] = (out_df.loc[x, 'distance'] - out_df.loc[x-1, 'distance']) * 3.6 #conversion from m/s to km/h
out_df
Out[6]:
time latitude longitude altitude distance heartRate lap Instant_pace
0 2014-12-26 07:37:25+00:00 45.437115 9.243325 109.577 0.0 136 0 0.00
1 2014-12-26 07:37:26+00:00 45.437116 9.243332 109.577 0.0 136 0 0.00
2 2014-12-26 07:37:27+00:00 45.437117 9.243331 109.425 0.0 136 0 0.00
3 2014-12-26 07:37:28+00:00 45.437115 9.243327 109.425 0.0 136 0 0.00
4 2014-12-26 07:37:29+00:00 45.437111 9.243319 109.272 0.0 137 0 0.00
... ... ... ... ... ... ... ... ...
2662 2014-12-26 08:21:47+00:00 45.437315 9.243202 105.310 7987.6 155 7 9.36
2663 2014-12-26 08:21:48+00:00 45.437322 9.243221 105.310 7990.0 155 7 8.64
2664 2014-12-26 08:21:49+00:00 45.437334 9.243256 105.310 7991.9 155 7 6.84
2665 2014-12-26 08:21:50+00:00 45.437334 9.243258 105.310 7994.4 154 7 9.00
2666 2014-12-26 08:21:51+00:00 45.437334 9.243263 105.310 7995.3 154 7 3.24

2667 rows × 8 columns

it's almost reliable, but there are many huge drop followed by a huge increase, probably it's better to understand if it's a normal behaviour or if there is the need to "normalize" the data. We try to do this by increasing the windows size. Increasing the windows it's only a way to do this, probably we could also try to normalize by means some kind of averaging.

In [7]:
#try with a window size of 3
out_df_norm = out_df

out_df_norm.loc[0, 'Instant_pace'] = 0
out_df_norm.loc[1, 'Instant_pace'] = 0
out_df_norm.loc[2, 'Instant_pace'] = 0

#hypothesizing that all records are at 1 second distance from each other
for i in range(3, len(out_df)):
    out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-3, 'distance']) / 3) * 3.6 #conversion to km/h

out_df_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))

#try with a window size of w
out_df_norm = out_df
w = 10
out_df_norm.loc[0, 'Instant_pace'] = 0

#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
    out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df)):
    out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h

out_df_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x25461dc7808>

also with a huge normalization factor there are drops and rise, we could check if they are due to other factors!

In [8]:
#comparing pace with altitude

out_df.plot(title='instant_pace by time', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
out_df.plot(title='altitude by time', x ='time', y='altitude', kind = 'line', figsize=(15,5))
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x2546219d048>

altitude remains almost the same during the entire run, this cannot be a factor

In [9]:
#to reach something pseudo stable we would need to normalize a lot
#try with a window size of w
out_df_norm = out_df
w = 60
#for x in range(0, w):
out_df_norm.loc[0, 'Instant_pace'] = 0

#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
    out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df)):
    out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h

out_df_norm.plot(title='instant_pace by time', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x254622a7c08>

hypothesis:

1 - data are really imprecise, need of a huge normalization factor like the last one

2 - the runner is doing repetitive training (ripetute) and it's normal to have this jump in the data

let's load another dataset to compare

DATASET 2

In [10]:
xtree = etree.parse('./Data/activities/1066731959.tcx')
xroot = xtree.getroot()

#ns is the default namespace of the tree
ns = {'d': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}

#Since Activity is always one element, we put the [0] to take the element out of the list
Activity = xroot.findall(".//d:Activity", ns)[0] 

df_cols = ["time", "latitude", "longitude", "altitude", "distance", "heartRate", "lap"]
rows = []

lapnum = 0; 

for lap in Activity.findall("./d:Lap", ns):
    for elem in lap.findall("./d:Track/d:Trackpoint", ns):

        #don't know if float and int conversion are necessary 
        s_time = etree.tostring(elem.find("./d:Time", ns), method="text").strip().decode("utf-8") 
        s_latitude = float(etree.tostring(elem.find("./d:Position/d:LatitudeDegrees", ns), method="text").strip().decode("utf-8"))
        s_longitude = float(etree.tostring(elem.find("./d:Position/d:LongitudeDegrees", ns), method="text").strip().decode("utf-8")) 
        s_altitude = float(etree.tostring(elem.find("./d:AltitudeMeters", ns), method="text").strip().decode("utf-8")) 
        s_distance = float(etree.tostring(elem.find("./d:DistanceMeters", ns), method="text").strip().decode("utf-8")) 
        s_heartRate = int(etree.tostring(elem.find("./d:HeartRateBpm/d:Value", ns), method="text").strip().decode("utf-8")) 

        rows.append({"time": s_time, "latitude": s_latitude, 
                     "longitude": s_longitude, "altitude": s_altitude,
                     "distance": s_distance, "heartRate": s_heartRate, 
                     "lap": lapnum})
    lapnum = lapnum +1 

out_df2 = pd.DataFrame(rows, columns = df_cols)
out_df2['time'] = pd.to_datetime(out_df2.time)
out_df2.set_index('time')
Out[10]:
latitude longitude altitude distance heartRate lap
time
2017-05-01 06:20:41+00:00 45.437266 9.243268 95.251 0.300000 115 0
2017-05-01 06:20:42+00:00 45.437255 9.243260 95.099 0.300000 115 0
2017-05-01 06:20:43+00:00 45.437244 9.243244 95.099 0.400000 115 0
2017-05-01 06:20:44+00:00 45.437232 9.243225 94.946 1.300000 116 0
2017-05-01 06:20:45+00:00 45.437220 9.243203 94.946 2.500000 116 0
... ... ... ... ... ... ...
2017-05-01 07:06:31+00:00 45.437214 9.243541 106.072 6981.600096 148 6
2017-05-01 07:06:32+00:00 45.437198 9.243517 106.224 6984.350096 148 6
2017-05-01 07:06:33+00:00 45.437182 9.243492 106.224 6987.100096 149 6
2017-05-01 07:06:34+00:00 45.437166 9.243468 106.224 6989.850096 148 6
2017-05-01 07:06:35+00:00 45.437153 9.243445 106.224 6992.600096 148 6

2755 rows × 6 columns

In [11]:
#trying to add instant pace to the dataset and understand if it's a reliable way to estimate the actual pace
out_df2.loc[0, 'Instant_pace'] = 0

#hypothesizing that all records are at 1 second distance from each other
for i in range(1, len(out_df2)):
    out_df2.loc[i, 'Instant_pace'] = (out_df2.loc[i, 'distance'] - out_df2.loc[i-1, 'distance']) * 3.6 #conversion to km/h

out_df2.plot(title='speed by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x2546030d508>
In [12]:
#WE COULD REUSE PRECEDENT DATASET AND ADD ONLY A COLUMN "NORMALIZED"
#try with a window size of w
out_df2_norm = out_df2
w = 60
out_df2_norm.loc[0, 'Instant_pace'] = 0

#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
    out_df2_norm.loc[i, 'Instant_pace'] = ((out_df2_norm.loc[i, 'distance'] - out_df2_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df2)):
    out_df2_norm.loc[i, 'Instant_pace'] = ((out_df2_norm.loc[i, 'distance'] - out_df2_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h

out_df2_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x254626c9f88>

DATASET PERE

try of the visualization of the pace in blocks of discrete distance

We now try to build a dataset by discretizing in blocks of about a fixed distance (100 m) the original dataset (out_df). We keep the distance and time values as they were in the out_df, we calculate the heartRate as the average of all the values inside the block. Finally we calculate the speed as:

(time_at_end_of_block - time_at_start_of_block)/ (distance_at_end_of_block - distance_at_start_of_block)

and use some conversion values (from second to minute and from meters to km).

Notice that the speed is now calculated as "minutes/KM" as requested by the professor, since the exploratory phase is almost ended.

As a drawback of this method to normalize the speed there is the fact that we "lose" a lot of data point that gets summarized, resulting in less accuracy. Furthermore, not all the blocks are composed by the same number of data points from the original dataset.

In [13]:
#hypothesizing that all records are at 1 second distance from each other
speed_df = pd.DataFrame(columns = ['speed', 'distance', 'time', 'heartRate']) 
i = 0
x = 0
heartsum = [0 , 1]
heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance, time, heartrate from original dataset
while (i*100 <= float(out_df.tail(1)['distance'])): 
    if (out_df.loc[x, 'distance'] >= i*100):
        speed_df.loc[i, 'distance'] = out_df.loc[x, 'distance']
        speed_df.loc[i, 'time'] = x
        speed_df.loc[i, 'heartRate'] = heartsum[0] / heartsum[1]
        heartsum = [0, 0]
        i = i+1
    heartsum[0] = heartsum[0] + out_df.loc[x, 'heartRate']
    heartsum[1] = heartsum[1] + 1
    x = x+1

#calculate speed
for x in range(1, len(speed_df)):
    speed_df.loc[x, 'speed'] = ((speed_df.loc[x, 'time'] - speed_df.loc[x-1, 'time'])/60 ) /  ((speed_df.loc[x, 'distance'] - speed_df.loc[x-1, 'distance'])/1000)

speed_df.plot(title='pace by minute in minutes/km', x = 'distance', y='speed', kind = 'line', figsize=(15,5))
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x25463dba388>

try of the visualization of the pace in min/km with sliding windows

We now try to resort to the sliding windows approach with the pace calculated as min/km. To calculate this value, similarly to what we did before, we set a window of 60 seconds, so to enrich each data point of the original dataset with the average speed in the 60 seconds before the considered point.

In the first 60 seconds, the size of the window is set to the number of second passed from the beginning, and the distance is calculated from the first point.

If the distance between two point is zero, for example if the runner stopped for a while, the pace is set to NaN, because otherwise it would go to infinite. The same is done with values of speed really low (<10 min/km) in order to not introduce noise in the visualizations.

In [14]:
out_df_norm = out_df_norm.sort_values("time").reset_index(drop= True)
out_df_norm.loc[0, 'pace min/km'] = np.nan
w = 60

#hypothesizing that all records are at 1 second distance from each other
for x in range(1, w):
    if (((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance']) >0) and ((x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000) <10)):
        out_df_norm.loc[x, 'pace min/km'] = (x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000)
    else:
        out_df_norm.loc[x, 'pace min/km'] = np.nan
for x in range(w, len(out_df)):
    if (out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance']):
        out_df_norm.loc[x, 'pace min/km'] = (w/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance'])/1000)
    else:
        out_df_norm.loc[x, 'pace min/km'] = np.nan
#out_df_norm.plot(title='pace by minute in km/h', x = 'distance', y='Instant_pace', kind = 'line', figsize=(15,5))
out_df_norm.plot(title='pace by minute in minutes/km', x = 'distance', y='pace min/km', kind = 'line', figsize=(15,5))
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x25463c7ea08>

Visualization tries

In [15]:
#try della visualizzazione 
out_df.plot(title='instant_pace by heartbeat in km/h', x ='heartRate', y='Instant_pace', kind = 'scatter', figsize=(15,5))
#si potrebbe pensare di visualizzare comunque il time con il colore, o fare delle cose simili a degli snake plot
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x25463c4a488>
In [16]:
#import
import plotly
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
In [17]:
trace_one = go.Scatter(
            x=out_df.time,
            y=out_df['heartRate'],
            name= "heartRate vs time",
            line = dict(color='#17BECF'),
            opacity = 0.8)

data = [trace_one]


layout = dict(
        title = 'heartRate vs time')


fig = dict(data=data, layout=layout)
iplot(fig, filename = 'heartRate vs time')
In [18]:
trace_one = go.Scatter(
            x=out_df.distance,
            y=out_df['Instant_pace'],
            name= "Instant_pace vs distance",
            line = dict(color='#17BECF'),
            opacity = 0.8)

data = [trace_one]


layout = dict(
        title = 'Instant_pace vs distance')


fig = dict(data=data, layout=layout)
iplot(fig, filename = 'Instant_pace vs distance')
In [19]:
trace = go.Scatter(
    y = out_df_norm['heartRate'],
    x = out_df_norm['pace min/km'],
    mode = 'markers')
data = [trace]
iplot(data)

trace1 = go.Bar( 
        x = out_df_norm['heartRate'],
        y = out_df_norm['pace min/km'],
        name= 'Rounds Played')
data = [trace1]
layout = go.Layout(barmode='group')

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='grouped-bar')
In [20]:
trace1 = go.Bar( 
        x = out_df['heartRate'],
        y = out_df['Instant_pace'],
        name= 'Rounds Played')
data = [trace1]
layout = go.Layout(barmode='group')

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='grouped-bar')
In [21]:
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')),
       go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False)
In [22]:
y = speed_df['heartRate']
x = speed_df['speed']#cambia nome da speed a pace in min/km
iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')),
       go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False)
In [23]:
import plotly.figure_factory as ff

y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
#z = df_bar_pubg['squad_Heals']

colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]

fig = ff.create_2d_density(
    x, y, colorscale=colorscale,
    hist_color='rgb(0, 0, 255)', point_size=1
)


iplot(fig, filename='histogram_subplots')
In [24]:
import chart_studio.plotly as py
import plotly.graph_objs as go
import chart_studio
import pandas as pd
In [25]:
# setting user, api key and access token
chart_studio.tools.set_credentials_file(username='pavmassimo', api_key='YLSreh15YEIWWNq75Lxx')
mapbox_access_token = 'pk.eyJ1IjoicGF2bWFzc2ltbyIsImEiOiJja2RzeGh4cHYxc2w1MnJ0YWczZndyZG50In0.GKQETZ2daXd-VrybJ571DQ'
In [26]:
data = []
laps = [0, 1, 2, 3, 4, 5, 6, 7, 8]
for lap in laps:
    lap_data = dict(
            lat = out_df.loc[out_df['lap'] == lap,'latitude'],
            lon = out_df.loc[out_df['lap'] == lap,'longitude'],
            name = lap,
            marker = dict(size = 8, opacity = 0.8),
            type = 'scattermapbox'
        )
    data.append(lap_data)
In [27]:
layout = dict(
    height = 800,
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    font = dict(color = '#FFFFFF', size = 11),
    paper_bgcolor = '#000000',
    mapbox = dict(
        # here you need the token from Mapbox
        accesstoken = mapbox_access_token,
        bearing = 0,
        # where we want the map to be centered
        center = dict(
            lat = 45.437,
            lon = 9.243
        ),
        # we want the map to be "parallel" to our screen, with no angle
        pitch = 0,
        # default level of zoom
        zoom = 15,
        # default map style
        style = 'dark'
    )
)
In [28]:
annotations = [dict(
  
              # text I want to display. I used <br> to break it into two lines
              text = 'All US storm events that caused more than $50k of economic damage,<br> from 2000 until today', 
              
              # font and border characteristics
              font = dict(color = '#FFFFFF', size = 14), borderpad = 10, 
              
              # positional arguments
              x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left', 
              
              # don't show arrow and set background color
              showarrow = False, bgcolor = 'black'
              )]

# assigning the annotations to the layout
layout['annotations'] = annotations
In [29]:
updatemenus=list([
    # drop-down 1: map styles menu
    # buttons containes as many dictionaries as many alternative map styles I want to offer
    dict(
        buttons=list([
            dict(
                args=['mapbox.style', 'dark'],
                label='Dark',
                method='relayout'
            ),                    
            dict(
                args=['mapbox.style', 'light'],
                label='Light',
                method='relayout'
            ),
            dict(
                args=['mapbox.style', 'outdoors'],
                label='Outdoors',
                method='relayout'
            ),
            dict(
                args=['mapbox.style', 'satellite-streets'],
                label='Satellite with Streets',
                method='relayout'
            )                    
        ]),
        # direction where I want the menu to expand when I click on it
        direction = 'up',
      
        # here I specify where I want to place this drop-down on the map
        x = 0.75,
        xanchor = 'left',
        y = 0.05,
        yanchor = 'bottom',
      
        # specify font size and colors
        bgcolor = '#000000',
        bordercolor = '#FFFFFF',
        font = dict(size=11)
    ),    
    
    # drop-down 2: select type of storm event to visualize
    dict(
         # for each button I specify which dictionaries of my data list I want to visualize. Remember I have 7 different
         # types of storms but I have 8 options: the first will show all of them, while from the second to the last option, only
         # one type at the time will be shown on the map
         buttons=list([
            dict(label = 'All Laps',
                 method = 'update',
                 args = [{'visible': [True, True, True, True, True, True, True]}]),
            dict(label = '0',
                 method = 'update',
                 args = [{'visible': [True, False, False, False, False, False, False]}]),
            dict(label = '1',
                 method = 'update',
                 args = [{'visible': [False, True, False, False, False, False, False]}]),
             dict(label = '2',
                 method = 'update',
                 args = [{'visible': [False, False, True, False, False, False, False]}]),
             dict(label = '3',
                 method = 'update',
                 args = [{'visible': [False, False, False, True, False, False, False]}]),
             dict(label = '4',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, True, False, False]}]),
             dict(label = '5',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, True, False]}]),
             dict(label = '6',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, False, False, True]}])
          
        ]),
        # direction where the drop-down expands when opened
        direction = 'down',
        # positional arguments
        x = 0.01,
        xanchor = 'left',
        y = 0.99,
        yanchor = 'bottom',
        # fonts and border
        bgcolor = '#000000',
        bordercolor = '#FFFFFF',
        font = dict(size=11)
    )
])

# assign the list of dictionaries to the layout dictionary
layout['updatemenus'] = updatemenus
In [30]:
layout['title'] = 'Pere miscredente'
In [31]:
data1 = []
# /1000 because distance is registered in meters
kms = [item for item in range(0, int(max(out_df['distance'])/1000)+1)] 
for km in kms:
    km_data = dict(
            lat = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'latitude'],
            lon = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'longitude'],
            customdata = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'distance'],
            hovertemplate ="%{customdata}",
            name = km,
            marker = dict(size = 8, opacity = 0.8),
            type = 'scattermapbox'
        )
    data1.append(km_data)
In [32]:
layout = dict(
    height = 800,
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    font = dict(color = '#FFFFFF', size = 11),
    paper_bgcolor = '#000000',
    mapbox = dict(
        # here you need the token from Mapbox
        accesstoken = mapbox_access_token,
        bearing = 0,
        # where we want the map to be centered
        center = dict(
            lat = out_df['latitude'][0],
            lon = out_df['longitude'][0]
        ),
        # we want the map to be "parallel" to our screen, with no angle
        pitch = 0,
        # default level of zoom
        zoom = 15,
        # default map style
        style = 'dark'
    )
)
In [33]:
"""annotations = [dict(
  
              # text I want to display. I used <br> to break it into two lines
              text = 'All US storm events that caused more than $50k of economic damage,<br> from 2000 until today', 
              
              # font and border characteristics
              font = dict(color = '#FFFFFF', size = 14), borderpad = 10, 
              
              # positional arguments
              x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left', 
              
              # don't show arrow and set background color
              showarrow = False, bgcolor = 'black'
              )]

# assigning the annotations to the layout
layout['annotations'] = annotations
"""
Out[33]:
"annotations = [dict(\n  \n              # text I want to display. I used <br> to break it into two lines\n              text = 'All US storm events that caused more than $50k of economic damage,<br> from 2000 until today', \n              \n              # font and border characteristics\n              font = dict(color = '#FFFFFF', size = 14), borderpad = 10, \n              \n              # positional arguments\n              x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left', \n              \n              # don't show arrow and set background color\n              showarrow = False, bgcolor = 'black'\n              )]\n\n# assigning the annotations to the layout\nlayout['annotations'] = annotations\n"
In [34]:
visibility = [False for item in range(0, int(max(out_df['distance'])/1000)+1)]
visibility_all = [True for item in range(0, int(max(out_df['distance'])/1000)+1)]

 # for each button I specify which dictionaries of my data list I want to visualize. Remember I have n different
 # kms to visualize but I have n+1 options: the first will show all of them, while from the second to the last option, only
 # one km at the time will be shown on the map
buttons=list([
    dict(label = 'All kms',
         method = 'update',
         args = [{'visible': visibility_all}])
])

for item in range(0, int(max(out_df['distance'])/1000)+1):
    visibility_partial = visibility.copy()
    visibility_partial[item] = True;
    buttons.append(
        dict(label = item,
         method = 'update',
         args = [{'visible': visibility_partial}])
    )
     # drop-down 2: select km to visualize
viz =dict(
        buttons = buttons,
        # direction where the drop-down expands when opened
        direction = 'down',
        # positional arguments
        x = 0.01,
        xanchor = 'left',
        y = 0.99,
        yanchor = 'bottom',
        # fonts and border
        bgcolor = '#000000',
        bordercolor = '#FFFFFF',
        font = dict(size=11)
    )

updatemenus=list([
    # drop-down 1: map styles menu
    # buttons containes as many dictionaries as many alternative map styles I want to offer
    dict(
        buttons=list([
            dict(
                args=['mapbox.style', 'dark'],
                label='Dark',
                method='relayout'
            ),                    
            dict(
                args=['mapbox.style', 'light'],
                label='Light',
                method='relayout'
            ),
            dict(
                args=['mapbox.style', 'outdoors'],
                label='Outdoors',
                method='relayout'
            ),
            dict(
                args=['mapbox.style', 'satellite-streets'],
                label='Satellite with Streets',
                method='relayout'
            )                    
        ]),
        # direction where I want the menu to expand when I click on it
        direction = 'up',
      
        # here I specify where I want to place this drop-down on the map
        x = 0.75,
        xanchor = 'left',
        y = 0.05,
        yanchor = 'bottom',
      
        # specify font size and colors
        bgcolor = '#000000',
        bordercolor = '#FFFFFF',
        font = dict(size=11)
    ),    
    viz
])

# assign the list of dictionaries to the layout dictionary
layout['updatemenus'] = updatemenus

VISUAL COMPARISON WITH SPEED DF BPM PER KM

In [35]:
avg_hr = [] 
sum_hr = 0
for x in range(len(speed_df)):
    sum_hr = sum_hr + speed_df.loc[x, 'heartRate']
    if ((x+1) % 10 == 0) :
        avg_hr.append(sum_hr/10)
        sum_hr = 0
avg_hr2 = []
for x in range(len(avg_hr)):
    avg_hr2.append(avg_hr[x]* (-1))    
y = list(range(1, 10, 1))

layout = go.Layout(yaxis=go.layout.YAxis(title=''),
                   xaxis=go.layout.XAxis(
                       range=[-220, 220],
                       tickvals=[-200, -100, -50, 0, 50, 100, 200],
                       ticktext=[200, 100, 50, 0, 50, 100, 200],
                       title='Number'),
                   barmode='overlay',
                   bargap=0.1)

data = [go.Bar(y=y,
               x=avg_hr,
               orientation='h',
               name='Men',
               text= np.array(avg_hr).astype('int'),
               hoverinfo='text',
               marker=dict(color='powderblue')
               ),
        go.Bar(y=y,
               x=avg_hr2,
               orientation='h',
               name='Women',
               text=-1 * np.array(avg_hr2).astype('int'),
               hoverinfo='text',
               marker=dict(color='seagreen')
               )]

py.iplot(dict(data=data, layout=layout))
Out[35]:

VISUAL COMPARISON WITH OUT_DF BPM PER KM

In [36]:
avg_hr = [] 

i = 1
x = 0
heartsum = [0 , 0]
heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while (x < len(out_df)): 
    if (out_df.loc[x, 'distance'] >= i*1000):
        if (heartsum[1] != 0):
            avg_hr.append( heartsum[0] / heartsum[1])
        else:
            avg_hr.append(0)
        heartsum = [0, 0]
        i = i+1
    heartsum[0] = heartsum[0] + out_df.loc[x, 'heartRate']
    heartsum[1] = heartsum[1] + 1
    x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
print(avg_hr)
avg_hr2 = []
for x in range(len(avg_hr)):
    avg_hr2.append(avg_hr[x]* (-1))    

y = list(range(1, 20, 1))

layout = go.Layout(yaxis=go.layout.YAxis(title='KM'),
                   xaxis=go.layout.XAxis(
                       range=[-220, 220],
                       tickvals=[-200, -100, -50, 0, 50, 100, 200],
                       ticktext=[200, 100, 50, 0, 50, 100, 200],
                       title='BPM'),
                   barmode='overlay',
                   bargap=0.1)

data = [go.Bar(y=y,
               x=avg_hr,
               orientation='h',
               name='RUN A',
               text= np.array(avg_hr).astype('int'),
               hoverinfo='text',
               marker=dict(color='powderblue')
               ),
        go.Bar(y=y,
               x=avg_hr2,
               orientation='h',
               name='RUN B',
               text=-1 * np.array(avg_hr2).astype('int'),
               hoverinfo='text',
               marker=dict(color='seagreen')
               )]

py.iplot(dict(data=data, layout=layout))
[153.9477806788512, 151.29023746701847, 172.98697068403908, 173.60128617363344, 177.8705501618123, 176.74918566775244, 177.13183279742765, 163.36666666666667]
Out[36]:
In [37]:
out_df_norm = out_df_norm.sort_values("Instant_pace")
out_df_norm = out_df_norm.reset_index(drop = True)

avg_hr = [] 

i = 1
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while (x < len(out_df_norm)): 
    while (out_df_norm.loc[x, 'Instant_pace'] >= i):
        if (heartsum[1] != 0):
            avg_hr.append( heartsum[0] / heartsum[1])
        else:
            avg_hr.append(0)
        heartsum = [0, 0]
        i = i+1
    
    heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
    heartsum[1] = heartsum[1] + 1
    x = x+1
avg_hr.append( heartsum[0] / heartsum[1])

print(avg_hr)
avg_hr2 = []
for x in range(len(avg_hr)):
    avg_hr2.append(avg_hr[x]* (-1))    

y = list(range(1, 20, 1))

layout = go.Layout(yaxis=go.layout.YAxis(title='KM/H'),
                   xaxis=go.layout.XAxis(
                       range=[-220, 220],
                       tickvals=[-200, -100, -50, 0, 50, 100, 200],
                       ticktext=[200, 100, 50, 0, 50, 100, 200],
                       title='BPM'),
                   barmode='overlay',
                   bargap=0.1)

data = [go.Bar(y=y,
               x=avg_hr,
               orientation='h',
               name='RUN A',
               text= np.array(avg_hr).astype('int'),
               hoverinfo='text',
               marker=dict(color='powderblue')
               ),
        go.Bar(y=y,
               x=avg_hr2,
               orientation='h',
               name='RUN B',
               text=-1 * np.array(avg_hr2).astype('int'),
               hoverinfo='text',
               marker=dict(color='seagreen')
               )]

py.iplot(dict(data=data, layout=layout))
[136.71428571428572, 139.0, 140.0, 141.0, 141.0, 141.0, 141.5, 143.16666666666666, 150.81656804733728, 154.9826224328594, 166.8156862745098, 175.57564575645756, 176.72254335260115, 175.0]
Out[37]:
In [38]:
out_df_norm = out_df_norm.sort_values("pace min/km")
out_df_norm = out_df_norm.reset_index(drop = True)

avg_hr = [] 
count = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(out_df_norm)) and not (np.isnan(out_df_norm.loc[x, 'pace min/km']))): 
    while (out_df_norm.loc[x, 'pace min/km'] >= i/2):
        if (heartsum[1] != 0):
            avg_hr.append( heartsum[0] / heartsum[1])
            count.append(heartsum[1])
        else:
            avg_hr.append(0)
            count.append(heartsum[1])
        heartsum = [0, 0]
        i = i+1
    
    heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
    heartsum[1] = heartsum[1] + 1
    x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])

avg_hr2 = []
count2 = count
for x in range(len(avg_hr)):
    avg_hr2.append(avg_hr[x]* (-1))    

y = list(range(6, 20, 1))
for x in range(len(y)):
    y[x]= y[x]/2 
    
fig = go.Figure()

# Edit the layout
fig.update_layout(title='BPM per pace in min/km comparison',
                   plot_bgcolor = 'rgba(0, 0, 0, 0)',
                   paper_bgcolor = 'rgba(0, 0, 0, 0)',
)

fig.update_layout(yaxis=go.layout.YAxis(
                       range=[3, 10],
                       tickvals=[ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
                       ticktext=[ "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5"],
                        title='pace in min/km',
                        showgrid=False),
                   xaxis=go.layout.XAxis(
                       range=[-220, 220],
                       tickvals=[-200, -150, -100, -50, 0, 50, 100, 150, 200],
                       ticktext=[200, 150, 100, 50, 0, 50, 100, 150, 200],
                       title='BPM',
                       showgrid=True, gridcolor='LightGrey'),
                   barmode='overlay',
                   bargap=0.1,
                   showlegend=False)
fig.add_trace(go.Bar(y=y,
               x=avg_hr,
               orientation='h',
               name='RUN A',
               text= np.array(count).astype('int'),
               hoverinfo='text',
               marker=dict(color=count, cmin=3, cmax =  max(count+count2), colorscale="Blues", colorbar=dict(title="."),),
               ))
fig.add_trace(go.Bar(y=y,
               x=avg_hr2,
               orientation='h',
               name='RUN B',
               text= np.array(count).astype('int'),
               hoverinfo='text',
               marker=dict(color=count, cmin=3, cmax =  max(count+count2), colorscale="Reds", colorbar=dict(title="number of records", x = 1, y = 0.5, showticklabels= False),)
               ))
fig.show()
#py.iplot(dict(data=data, layout=layout))
#out_df_norm.tail(30)

ALTERNATIVE BAR PLOT

In [39]:
out_df_norm = out_df_norm.sort_values("pace min/km")
out_df_norm = out_df_norm.reset_index(drop = True)

avg_hr = [] 
count = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(out_df_norm)) and not (np.isnan(out_df_norm.loc[x, 'pace min/km']))): 
    while (out_df_norm.loc[x, 'pace min/km'] >= i/2):
        if (heartsum[1] != 0):
            avg_hr.append( heartsum[0] / heartsum[1])
            count.append(heartsum[1])
        else:
            avg_hr.append(0)
            count.append(heartsum[1])
        heartsum = [0, 0]
        i = i+1
    
    heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
    heartsum[1] = heartsum[1] + 1
    x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])

avg_hr2 = avg_hr
count2 = count
#for x in range(len(avg_hr)):
#    avg_hr2.append(avg_hr[x]* (-1))    

y = list(range(6, 20, 1))
for x in range(len(y)):
    y[x]= y[x]/2 
    
fig = go.Figure()

# Edit the layout
fig.update_layout(title='BPM per pace in min/km comparison',
                   plot_bgcolor = 'rgba(0, 0, 0, 0)',
                   paper_bgcolor = 'rgba(0, 0, 0, 0)',
)

fig.update_layout(yaxis=go.layout.YAxis(
                       range=[3, 10],
                       tickvals=[ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
                       ticktext=[ "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5"],
                        title='pace in min/km',
                        showgrid=False),
                   xaxis=go.layout.XAxis(
                       range=[0, 220],
                       tickvals=[0, 50, 100, 150, 200],
                       ticktext=[0, 50, 100, 150, 200],
                       title='BPM',
                       showgrid=True, gridcolor='LightGrey'),
                   barmode='overlay',
                   bargap=0.1,
                   showlegend=False)
fig.add_trace(go.Bar(y=y,
               x=avg_hr,
               orientation='h',
               name='RUN A',
               text= np.array(count).astype('int'),
               hoverinfo='text',
               marker=dict(color=count, cmin=3, cmax =  max(count+count2), colorscale="Blues", colorbar=dict(title="."),),
               ))
fig.add_trace(go.Bar(y=y,
               x=avg_hr2,
               orientation='h',
               name='RUN B',
               text= np.array(count).astype('int'),
               hoverinfo='text',
               marker=dict(color=count, cmin=3, cmax =  max(count+count2), colorscale="Reds", colorbar=dict(title="number of records", x = 1, y = 0.5, showticklabels= False),)
               ))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()
In [ ]: